import matplotlib.pyplot as plt
plt.rcParams.update({'figure.figsize': (20, 8)})
from models_gaussian_2d import *
from eval_utils import *
import time
2022-09-22 15:05:55.886527: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory 2022-09-22 15:05:55.886564: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
WARNING:tensorflow:From /Ziob/kabalce/miniconda3/envs/hmm/lib/python3.10/site-packages/tensorflow/python/compat/v2_compat.py:107: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. Instructions for updating: non-resource variables are not supported in the long term
from sklearn.decomposition import PCA
from celluloid import Camera
import matplotlib.cm as cm
from IPython.display import display, Markdown, Latex, HTML
import os
# os.chdir('./wodociagi')
import pandas as pd
import numpy as np
plt.rcParams.update({'figure.figsize': (20, 8)})
df_main = pd.read_excel('../../data/Dane_Uwr.xlsx', sheet_name='Surowe_hydraulika').ffill()
df_main.columns = ['mtime', 'P1', 'V1', 'Q1']
df_main = df_main.ffill()
df_main["V_delta"] = np.array([0] + (df_main.V1[1:].values - df_main.V1[:-1].values).tolist())
plt.plot(df_main["mtime"], df_main["V1"])
plt.title("Original V1")
plt.show()
plt.plot(df_main["mtime"], df_main["V_delta"])
plt.title("Original V delta")
plt.show()
df_main.loc[(df_main.V_delta.abs() > 1e+3), "V_delta"] = 0
plt.plot(df_main["mtime"], df_main["V_delta"])
plt.title("V delta without outliers")
plt.show()
df_main = pd.concat([df_main.V_delta, df_main.mtime.dt.round("H")], axis=1).groupby("mtime").sum().reset_index()
plt.plot(df_main["mtime"], df_main["V_delta"])
plt.title("V delta ourly concatenation")
plt.show()
plt.plot(df_main.loc[(df_main.mtime.dt.year == 2019), "mtime"], df_main.loc[(df_main.mtime.dt.year == 2019), "V_delta"])
seasonal_changes = df_main.V_delta.rolling(24 * 42 * 6, center=True, min_periods=2).mean().rolling(24 * 7 * 6, center=True, min_periods=2).mean()[(df_main.mtime.dt.year == 2019)]
plt.plot(df_main.loc[(df_main.mtime.dt.year == 2019), "mtime"], seasonal_changes , color = "red")
plt.title("V delta ourly concatenation with seasonal changes - 07.2019 ")
plt.show()
data = df_main.V_delta[(df_main.mtime.dt.year == 2019)] - seasonal_changes
plt.plot(data)
plt.title("Data")
lengths = np.array([24 * 7 * 6 for _ in range(data.shape[0] // (24 * 7 * 6))] + [
data.shape[0] - (data.shape[0] // (24 * 6 * 7)) * 24 * 7 * 6])
Y_true = data.values.reshape(-1, 1)
plt.plot(Y_true)
plt.title("Hourly deviations from seasonality")
plt.vlines(np.array([24*i for i in range(Y_true.shape[0] // 24)]), -10, 7.5, color='grey')
plt.show()
plt.plot(pd.Series(Y_true[:, 0]).rolling(6, center=True, min_periods=2).mean())
plt.title("Hourly deviations from seasonality")
plt.vlines(np.array([24*i for i in range(Y_true.shape[0] // 24)]), -100, 100, color='grey')
plt.show()
print(Y_true.shape)
Y_true = pd.Series(Y_true[:, 0]).rolling(6, center=True, min_periods=2).mean().values.reshape(-1, 1)
print(Y_true.shape)
(8760, 1) (8760, 1)
n = 10
l = 4
lr = 0.03271365590433669
ITER = 714450
lr = 0.1
ITER = 70000
TOLERANCE = 1e-4
def em_scheduler(max_lr, it):
if it <= np.ceil(2 * ITER / 3):
return max_lr * np.cos((np.ceil(ITER * 2 / 3) - it / 2) / ITER * np.pi * .67)
else:
return max_lr * np.cos(3 * (np.ceil(ITER * 2 / 3) - it) * np.pi * .33 / ITER) ** 3
mstep_cofig = {"cooc_lr": lr, "cooc_epochs": ITER, "l_uz": l,
'loss_type': 'square', "scheduler": em_scheduler}
t = time.localtime()
true_values = None
wandb_params = {
"init": {
"project": "gaussian-dense-hmm-wodociagi",
"entity": "cirglaboratory",
"save_code": True,
"group": f"first-runs-2",
"job_type": f"{t.tm_year}-{t.tm_mon}-{t.tm_mday}",
"name": f"test3",
"reinit": True
},
"config": {
"n": n,
"s": len(lengths),
"T": lengths.max(),
"model": None,
"m": None,
"l": None,
"lr": lr,
"em_epochs": None,
"em_iter": None,
"cooc_epochs": ITER,
"simple_model": None
}
}
mstep_cofig = {"cooc_lr": lr, "cooc_epochs": ITER, "l_uz": l,
'loss_type': 'square', "scheduler": em_scheduler}
hmm_monitor = DenseHMMLoggingMonitor(tol=TOLERANCE, n_iter=0, verbose=True,
wandb_log=True, wandb_params=wandb_params, true_vals=true_values,
log_config={'metrics_after_convergence': True})
densehmm = GaussianDenseHMM(n, mstep_config=mstep_cofig,
covariance_type='full', opt_schemes={"cooc"},
logging_monitor=hmm_monitor,
init_params="stmc", params="stmc", early_stopping=True)
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving. wandb: Currently logged in as: kabalce (cirglaboratory). Use `wandb login --relogin` to force relogin
/Ziob/kabalce/GausianDenseHMM/code_dense_hmm/wodociagi/wandb/run-20220922_150702-1rq3q0lj
start = time.perf_counter()
densehmm.fit_coocs(Y_true,lengths)
time_tmp = time.perf_counter() - start
2022-09-22 15:07:11.602083: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-22 15:07:11.602632: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.605438: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.609816: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.612441: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcufft.so.10'; dlerror: libcufft.so.10: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.615484: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcurand.so.10'; dlerror: libcurand.so.10: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.618686: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.621313: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusparse.so.11'; dlerror: libcusparse.so.11: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.625628: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.625644: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2022-09-22 15:07:11.646963: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-22 15:07:11.695360: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
1 -111936.2803 +nan
2 -109119.1431 +2817.1372
3 -107760.4405 +1358.7026
4 -106493.8307 +1266.6098
5 -105024.9959 +1468.8348
6 -103302.3239 +1722.6720
7 -102025.0420 +1277.2819
8 -101349.4184 +675.6236
9 -101149.5445 +199.8739
10 -101061.1022 +88.4423
11 -101018.6675 +42.4348
12 -100993.4459 +25.2216
13 -100963.2177 +30.2282
14 -100906.0119 +57.2058
15 -100785.9029 +120.1091
16 -100492.1321 +293.7708
17 -99945.6941 +546.4380
18 -99504.3104 +441.3837
19 -98727.2713 +777.0391
20 -97916.0223 +811.2490
21 -97299.8312 +616.1911
22 -96865.8197 +434.0115
23 -96713.8580 +151.9617
24 -96630.6961 +83.1620
25 -96562.4897 +68.2064
26 -96498.5590 +63.9306
27 -96432.7724 +65.7866
28 -96361.5641 +71.2083
29 -96283.2469 +78.3172
30 -96197.6848 +85.5621
31 -96106.5033 +91.1815
32 -96013.8802 +92.6231
33 -95927.8293 +86.0509
34 -95861.1785 +66.6508
35 -95829.3068 +31.8717
36 -95839.8128 -10.5059
37 -95880.0889 -40.2762
38 -95926.6728 -46.5839
39 -95965.8657 -39.1929
40 -95995.1739 -29.3082
41 -96015.9843 -20.8104
42 -96029.9209 -13.9366
43 -96038.1488 -8.2278
44 -96041.4660 -3.3173
45 -96040.4551 +1.0109
46 -96035.5763 +4.8789
47 -96027.2163 +8.3599
48 -96014.3791 +12.8372
49 -95995.0550 +19.3241
50 -95972.2024 +22.8526
51 -95946.6646 +25.5378
52 -95919.1981 +27.4665
53 -95890.4704 +28.7276
54 -95861.0618 +29.4086
55 -95831.4699 +29.5919
56 -95802.1174 +29.3525
57 -95773.3607 +28.7567
58 -95745.4983 +27.8624
59 -95718.7795 +26.7189
60 -95693.4102 +25.3693
61 -95669.5583 +23.8519
62 -95647.3567 +22.2016
63 -95626.9049 +20.4518
64 -95608.2697 +18.6352
65 -95591.4858 +16.7839
66 -95576.5561 +14.9296
67 -95563.4529 +13.1033
68 -95552.1189 +11.3340
69 -95542.4706 +9.6483
70 -95534.4007 +8.0699
# densehmm_org_aparms = densehmm
# densehmm_100k = densehmm
# densehmm_200k = densehmm
# densehmm_700k = densehmm
# densehmm_500k = densehmm
z_init = np.transpose(hmm_monitor.z[-1])
pca_z = PCA(n_components=2).fit(z_init)
z = [pca_z.transform(z_init)] + [pca_z.transform(np.transpose(x)) for x in hmm_monitor.z]
z0 = list(hmm_monitor.z0)
u_init = hmm_monitor.u[-1]
pca_u = PCA(n_components=2).fit(u_init)
u = [pca_u.transform(u_init)] + [pca_u.transform(x) for x in hmm_monitor.u]
# Draw embeddings trajectories
def draw_embeddings(embeding_list, name="?"):
fig = plt.figure(figsize=(5, 5))
camera = Camera(fig)
cmap = cm.rainbow(np.linspace(0, 1, len(embeding_list[0])))
for z_el in embeding_list:
if z_el.shape[1] > 1:
plt.scatter(z_el[:, 0], z_el[:, 1], color=cmap)
else:
plt.scatter(np.arange(z_el.shape[0]), z_el, color=cmap)
camera.snap()
plt.title(f"Embaddings trajectory: {name}")
animation = camera.animate().to_html5_video()
wandb.log({f"Embaddings trajectory: {name}": wandb.Html(animation)})
display(HTML(animation))
plt.close()
draw_embeddings(z, "z")
draw_embeddings(z0, "z0")
draw_embeddings(u, "u")
# Draw graph scaled with width and alpha
representation = densehmm.get_representations()
u_fin, z_fin, z0_fin = representation
u_fin
array([[ 0.81522926, 0.7749844 , -1.36163476, 0.19171012],
[ 1.25762801, -1.64764015, -0.86976148, -0.81340828],
[-0.34090178, -0.36307474, 0.91574129, -0.88924059],
[-0.61967726, -1.28221738, -1.29863423, 1.1147602 ],
[-1.87944403, -2.08516514, 1.0882987 , -1.85575953],
[ 1.54160007, 0.39915993, -0.87551868, 0.071946 ],
[ 1.00168665, -1.1307586 , -2.46512821, 0.76002356],
[-1.07714347, -1.33433337, -0.45162947, 1.11889856],
[-0.09443906, 0.53315114, 0.00746519, -1.81783205],
[-1.22671903, 0.27836264, 0.07269379, -0.77325455]])
z_fin
array([[ 0.7089109 , 0.45454697, 1.07223965, -1.55627429, 0.56180282,
1.28863812, -0.07946448, -0.17687036, 0.6124697 , 0.01874523],
[ 1.73107433, -0.25286488, 0.30963567, 0.09742843, -1.62417265,
1.30218801, -1.41790256, -0.40837113, 0.31231638, -0.18306023],
[-1.13546512, 1.74606951, -1.20980683, -1.55247084, 0.24461675,
0.14658406, -0.91979523, 0.31339505, 0.72595757, -2.27086582],
[-0.36310786, 0.10921562, 1.94817127, 0.75534236, -0.93796748,
-0.04715727, -0.01200375, 1.42588812, 0.27495063, -0.07798396]])
z0_fin
array([[ 0.28449496],
[ 0.17474797],
[-1.46511687],
[ 1.34018219]])
np.matmul(u_fin, z_fin)
array([[ 3.39595782, -2.18197754, 3.13488313, 1.0654902 , -1.31360689,
1.85107642, 0.08648979, -0.61404356, -0.1944347 , 2.95055236],
[-0.67770261, -0.61921955, 0.30589518, -1.38186354, 3.93278332,
-0.61404492, 3.04602284, -0.9819966 , -0.59938299, 2.36373632],
[-1.58708038, 1.43868196, -3.31821233, -1.59817985, 1.45626088,
-0.73592337, -0.28972607, -0.77240415, 0.09810587, -1.95010483],
[-1.58913387, -2.10320109, 2.6813776 , 3.69758074, 0.37112947,
-2.71115596, 3.04839791, 1.8157609 , -1.41624037, 3.08519769],
[-5.50381853, 1.3705367 , -7.59282452, -0.36950971, 4.33764941,
-4.89016047, 2.12717414, -1.12109886, -1.52251731, -1.98018065],
[ 2.75182936, -0.92106272, 2.97593048, -0.94670197, -0.06387891,
2.37461604, 0.11596195, -0.60746502, 0.45303971, 1.93840214],
[ 1.27577596, -3.48003598, 5.18690995, 2.73205042, 1.08340855,
-0.57883783, 3.78199704, 0.59575009, -1.32026123, 5.76447951],
[-2.96690035, -0.81858149, 1.15807731, 3.09262174, 0.40207894,
-3.24456693, 2.379525 , 2.18930373, -1.09667387, 1.16240585],
[ 1.50756801, -0.3632431 , -3.48665831, -1.18575789, 0.78790782,
0.65938352, -0.73349746, -2.79070564, -0.38572388, 0.02544024],
[-0.18953453, -0.58551262, -2.82352352, 1.23930481, -0.39821345,
-1.17119608, -0.35479196, -0.97649763, -0.82422524, -0.17872865]])
z0_fin_red = z0[-1]
z_fin_red = z[-1]
u_fin_red = u[-1]
A_trained = np.exp(np.matmul(u_fin, z_fin)) / np.exp(np.matmul(u_fin, z_fin)).sum(axis=1).reshape(-1, 1)
A_largest = A_trained * (A_trained == A_trained.max(axis=1).reshape(-1, 1))
A_largest
array([[0.35506057, 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0.58791086,
0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0.36101667,
0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0.38015969, 0. ,
0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0.84803103,
0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0.33842444, 0. , 0. ,
0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0.56303925],
[0. , 0. , 0. , 0.43628568, 0. ,
0. , 0. , 0. , 0. , 0. ],
[0.3786148 , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0.41953012, 0. ,
0. , 0. , 0. , 0. , 0. ]])
import itertools
plt.figure(figsize=(10, 10))
for i, j in itertools.product(range(n), range(n)):
plt.arrow(z_fin_red[i, 0], z_fin_red[i, 1], u_fin_red[j, 0] - z_fin_red[i, 0], u_fin_red[j, 1] - z_fin_red[i, 1], width=A_trained[i, j]/15)
plt.scatter(z_fin_red[:, 0], z_fin_red[:, 1], color = cm.rainbow(np.linspace(0, 1, n)))
plt.scatter(u_fin_red[:, 0], u_fin_red[:, 1], color = cm.rainbow(np.linspace(0, 1, n)))
plt.title("Transpositions")
plt.show()
z_fin_red
array([[-0.72267441, 1.63276854],
[ 2.10379795, -0.08135393],
[-0.87688568, 0.78568821],
[-1.70206594, -0.68215358],
[ 1.13645818, -1.44760401],
[ 0.64435133, 1.58403377],
[-0.32387269, -1.47386778],
[ 0.38551427, -0.39773221],
[ 1.0853405 , 0.46823828],
[-1.72996349, -0.38801729]])
uz_fin = np.concatenate([u_fin, np.transpose(z_fin)], axis=1)
pca_uz = PCA(n_components=2)
uz_fin_red = pca_uz.fit_transform(uz_fin)
uz_fin_red
array([[-2.29324113, -0.86019254],
[ 0.09265259, -1.07771949],
[ 0.78577611, -0.5595661 ],
[-0.74000947, 2.60998982],
[ 3.69374412, -0.08307797],
[-1.90316739, -1.72143021],
[-1.35671569, 1.64144196],
[ 0.43200548, 1.44389191],
[ 0.63241565, -1.94894777],
[ 0.65653973, 0.55561038]])
plt.figure(figsize=(10, 10))
for i, j in itertools.product(range(n), range(n)):
if i == j:
continue
plt.arrow(uz_fin_red[i, 0], uz_fin_red[i, 1], uz_fin_red[j, 0] - uz_fin_red[i, 0], uz_fin_red[j, 1] - uz_fin_red[i, 1], width=(A_trained[i, j] / 1.5 + 0.33) * 0.06, alpha=A_trained[i, j] / 1.5 + 0.33) #, color=cm.rainbow(A_trained[i, j] )
plt.scatter(uz_fin_red[:, 0], uz_fin_red[:, 1], color = cm.rainbow(np.linspace(0, 1, n)))
plt.scatter(uz_fin_red[:, 0], uz_fin_red[:, 1], color = cm.rainbow(np.linspace(0, 1, n)))
plt.title("Transpositions")
plt.show()
states = densehmm.predict(Y_true).reshape(1, -1)
interval = 7 * 24
for i in range(Y_true.shape[0] // (interval) + 1):
plt.figure(figsize=(10, 10))
plt.plot(Y_true[(i*interval) : ((i+1)*interval)])
plt.imshow(states[:, (i*interval) : ((i+1)*interval)], extent=(0, Y_true[(i*interval) : ((i+1)*interval)].shape[0], -100, 100), cmap=cm.rainbow)
plt.show()
plt.hist(densehmm.predict(Y_true).reshape(-1), [i - 0.5 for i in range(n+1)])
plt.title("State frequency")
plt.show()
plt.matshow(pd.DataFrame({"states": states.reshape(-1), "hour": np.arange(states.shape[1]) % 24 }).value_counts().reset_index().sort_values(["states", "hour"]).pivot("states", "hour", 0).fillna(0).values)
plt.ylabel("state")
plt.xlabel("hour")
plt.title("State frequency in 24 hours")
plt.show()
weekly = pd.DataFrame({"states": states.reshape(-1), "hour": np.arange(states.shape[1]) % (24 * 7) }).value_counts().reset_index().sort_values(["states", "hour"]).pivot("states", "hour", 0).fillna(0).values
l = weekly.shape[1] // 7
days = ['wtorek', 'środa', 'czwartek', 'piątek', 'sobota', 'niedziela', 'poniedziałek']
for i in range(7):
plt.matshow(weekly[:, (l*i) : (l*(i+1))])
plt.ylabel("state")
plt.xlabel("hour")
plt.title(f"State frequency in 24 hours: {days[i]}")
plt.show()